This data set was generated by Reddit user gigantoir https://www.reddit.com/r/NFLstatheads/comments/q73yd0/nfl_scores_20172020/ I added the 2021 data that was scrapped from https://www.footballdb.com/games/index.html and 2002-2016 data from Reddit user yuxbni76 https://www.reddit.com/user/yuxbni76
Scores <- read.csv("NFL_SCORES_2002-2021.txt", header=TRUE, sep= "\t")
Scores$Score_differential <- abs(Scores$Score_away - Scores$Score_home)
Scores$Week <- as.factor(Scores$Week)
summary(Scores)
## Year Week Home Away
## Min. :2002 2 : 323 Length:5044 Length:5044
## 1st Qu.:2006 1 : 313 Class :character Class :character
## Median :2011 12 : 313 Mode :character Mode :character
## Mean :2011 3 : 312
## 3rd Qu.:2016 14 : 304
## Max. :2021 15 : 304
## (Other):3175
## Score_home Score_away Score_differential
## Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:16.0 1st Qu.:14.00 1st Qu.: 4.0
## Median :23.0 Median :21.00 Median : 8.0
## Mean :23.3 Mean :21.14 Mean :11.7
## 3rd Qu.:30.0 3rd Qu.:28.00 3rd Qu.:17.0
## Max. :62.0 Max. :59.00 Max. :59.0
##
Team colors were extracted from https://teamcolorcodes.com, I took the first primary color for each team and created a list that will be for later use. For the Browns and Titans I took the secondary color as it seemed more appropriate.
Team_colors <- c("SF"="#AA0000",
"CHI"="#0B162A",
"CIN"="#FB4F14",
"BUF"="#00338D",
"DEN"="#FB4F14",
"CLE"="#FF3C00",
"TB"="#D50A0A",
"ARI"="#97233F",
"LAC"="#0080C6",
"KC"="#E31837",
"IND"="#002C5F",
"DAL"="#041E42",
"MIA"="#008E97",
"PHI"="#004C54",
"ATL"="#A71930",
"NYG"="#0B2265",
"JAX"="#006778",
"NYJ"="#125740",
"DET"="#0076B6",
"GB"="#203731",
"CAR"="#0085CA",
"NE"="#002244",
"LV"="#000000",
"LA"="#003594",
"BAL"="#241773",
"WAS"="#773141",
"NO"="#D3BC8D",
"SEA"="#002244",
"PIT"="#FFB612",
"HOU"="#03202F",
"TEN"="#4B92DB",
"MIN"="#4F2683")
ggplot(Scores, aes(x=Week, y=Score_differential)) +
geom_boxplot() +
scale_x_discrete(breaks = seq(1,17,1), guide = guide_axis(n.dodge=2)) +
facet_wrap(~Year) +
ylab("Absolute Score Differential")
ggplot(Scores, aes(x=Week, y=Score_differential)) +
geom_smooth(aes(as.numeric(Week), Score_differential), method = "loess") +
scale_x_continuous(breaks = c(1,9,17)) +
scale_y_continuous(breaks = seq(7,21,7)) +
facet_wrap(~Year) +
ylab("Absolute Score Differential")
## `geom_smooth()` using formula 'y ~ x'
Score_varience <- c(var(Scores[Scores$Year == 2002, ]$Score_differential),
var(Scores[Scores$Year == 2003, ]$Score_differential),
var(Scores[Scores$Year == 2004, ]$Score_differential),
var(Scores[Scores$Year == 2005, ]$Score_differential),
var(Scores[Scores$Year == 2006, ]$Score_differential),
var(Scores[Scores$Year == 2007, ]$Score_differential),
var(Scores[Scores$Year == 2008, ]$Score_differential),
var(Scores[Scores$Year == 2009, ]$Score_differential),
var(Scores[Scores$Year == 2010, ]$Score_differential),
var(Scores[Scores$Year == 2011, ]$Score_differential),
var(Scores[Scores$Year == 2012, ]$Score_differential),
var(Scores[Scores$Year == 2013, ]$Score_differential),
var(Scores[Scores$Year == 2014, ]$Score_differential),
var(Scores[Scores$Year == 2015, ]$Score_differential),
var(Scores[Scores$Year == 2016, ]$Score_differential),
var(Scores[Scores$Year == 2017, ]$Score_differential),
var(Scores[Scores$Year == 2018, ]$Score_differential),
var(Scores[Scores$Year == 2019, ]$Score_differential),
var(Scores[Scores$Year == 2020, ]$Score_differential),
var(Scores[Scores$Year == 2021, ]$Score_differential))
Score_varience <- as.data.frame(Score_varience)
Score_varience$Year <- c(2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,
2012,2013,2014,2015,2016,2017,2018,2019,2020,2021)
Score_varience$Year <- as.factor(Score_varience$Year)
ggplot(Score_varience, aes(x=Year, y=Score_varience)) +
geom_point(size = 3) +
geom_line(aes(x=as.numeric(Year))) +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
ylab("Score Differential Varience")
p <- ggplot(Scores, aes(x=Year, y=Score_differential, color = Home, label2=Score_home, label3=Score_away)) +
geom_smooth(aes(as.numeric(Year), Score_differential), se=FALSE, method = "loess", formula = y ~ x) +
scale_color_manual(values = Team_colors, name = "Team") +
ylab("Absolute Score Differential")
#scale_x_continuous(breaks = seq(1,17,1), guide = guide_axis(n.dodge=2)) +
#facet_wrap(~Year)
plot <- ggplotly(p, tooltip = c('Home', 'Score_home', 'Score_away'))
plot